Using e-commerce data, build time series models to predict the price of a certain product 4 weeks in advance. And finally evaluate each of the algorithms.
rm(list=ls(all=TRUE))library(zoo)
library(dplyr)
library(TTR)
library(forecast)
library(DMwR)
#library(data.table)data = readRDS("ecommerceData.RData")## Dimension of the Data set
dim(data)[1] 3316190 6
## Look at the summary statistics
summary(data) V1 TitleKey Price Quantity Condition Date
Length:3316190 Min. : 221205 Min. : 0.01 Min. : 1.00 Length:3316190 Length:3316190
Class :character 1st Qu.:1561944 1st Qu.: 63.97 1st Qu.: 1.00 Class :character Class :character
Mode :character Median :4302628 Median : 86.85 Median : 1.00 Mode :character Mode :character
Mean :4407778 Mean : 91.82 Mean : 30.77
3rd Qu.:6989428 3rd Qu.: 113.93 3rd Qu.: 3.00
Max. :9186772 Max. :2545.21 Max. :999.00
## As it is not very clear, lets look at the first and last 10 records using head and tail commands
head(data, 10)tail(data, 10)## Look into Condition attribute
table(data$Condition)
Acceptable Good Mint New Very Good
284251 778239 280389 1593241 380070
## Look into Titlekey attribute
table(data$TitleKey)
221205 273693 315169 367034 432113 433756 490823 506270 842501 879967 1018776 1091846 1118812 1123511 1138845 1140553
32051 31991 32866 32574 32699 31663 32820 32597 32561 32724 33243 32307 32454 32217 30684 32359
1198056 1205266 1288607 1462143 1464896 1515222 1523071 1532705 1541863 1561944 1619727 1644625 1707665 1752438 2096577 2231521
33702 32870 31814 31199 32598 32661 33088 32136 33188 31879 32614 20176 32729 32794 31656 32727
2422979 2437264 2443602 2446225 2450811 2451420 2660657 2773679 2808755 2871259 2873279 3019001 3119793 3120432 3121347 3128336
32698 26246 32420 32333 33052 32394 34068 29268 32905 32422 32431 32318 32415 32534 33361 32565
3128938 4283861 4289323 4302628 4389202 4456969 4505684 4511602 4676989 4687744 4688228 4690721 4702308 4705669 4730066 4735628
31071 33981 32555 33807 32563 31470 32194 32530 31951 32336 32509 32344 32884 32940 32566 26243
4770054 4793422 4862728 5124701 5294728 5724329 6252362 6254632 6399602 6485311 6760671 6871280 6896787 6915669 6989428 8044330
32601 32161 16454 32260 32343 29561 31273 32672 31525 29305 28499 30498 32065 31715 29433 31558
8045088 8046315 8046648 8046762 8048046 8049481 8050017 8366789 8374241 8768865 8772755 8778959 8787038 8799748 8830516 8896060
31633 29719 29578 30837 30827 31212 30522 29475 33082 28739 32956 28705 27884 30325 28957 26103
8944101 8946697 8973539 8977907 9000092 9010840 9062282 9078964 9117389 9149582 9186772
28412 25816 28363 27366 27926 28191 26821 27455 26095 25606 26647
## Find out number of the TitleKeys
length(unique(data$TitleKey))[1] 107
## Confirm whether V1 is unique for each of the record
length(unique(data$V1))[1] 3316190
data$V1 = NULL
data$TitleKey = as.factor(data$TitleKey)
data$Price = as.numeric(data$Price)
data$Quantity = as.numeric(data$Quantity)
data$Condition = as.factor(data$Condition)
data$Date = as.Date(data$Date, format="%Y-%m-%d")# Summary of the data
summary(data) TitleKey Price Quantity Condition Date
2660657: 34068 Min. : 0.01 Min. : 1.00 Acceptable: 284251 Min. :2009-01-01
4283861: 33981 1st Qu.: 63.97 1st Qu.: 1.00 Good : 778239 1st Qu.:2010-06-14
4302628: 33807 Median : 86.85 Median : 1.00 Mint : 280389 Median :2011-08-31
1198056: 33702 Mean : 91.82 Mean : 30.77 New :1593241 Mean :2011-08-17
3121347: 33361 3rd Qu.: 113.93 3rd Qu.: 3.00 Very Good : 380070 3rd Qu.:2012-10-23
1018776: 33243 Max. :2545.21 Max. :999.00 Max. :2013-12-02
(Other):3114028
# Re-look at the first 6 records
head(data)data = data[data$TitleKey==6989428 & data$Condition=="Good",]dim(data)[1] 5848 5
summary(data) TitleKey Price Quantity Condition Date
6989428:5848 Min. : 39.84 Min. : 1.000 Acceptable: 0 Min. :2009-04-15
221205 : 0 1st Qu.: 60.00 1st Qu.: 1.000 Good :5848 1st Qu.:2011-01-26
273693 : 0 Median : 72.25 Median : 1.000 Mint : 0 Median :2012-03-01
315169 : 0 Mean : 73.30 Mean : 3.334 New : 0 Mean :2011-12-31
367034 : 0 3rd Qu.: 83.75 3rd Qu.: 3.000 Very Good : 0 3rd Qu.:2013-01-08
432113 : 0 Max. :999.00 Max. :68.000 Max. :2013-12-02
(Other): 0
head(data)data = data[order(data$Date, decreasing=F), ]
head(data,10)data = data %>% group_by(Date) %>% summarise("MinPrice" = min(Price))
class(data)[1] "tbl_df" "tbl" "data.frame"
typeof(data)[1] "list"
data = data.frame(data)
head(data,10)typeof(data)[1] "list"
minDate = min(data$Date)
maxDate = max(data$Date)
seq = data.frame("DateRange"=seq(minDate, maxDate, by="days"))
seqdata = seq %>% full_join(data, c("DateRange" = "Date"))
datadata = data.frame(data)
datarm(minDate, maxDate, seq)
head(data)data$MinPrice = (na.locf(data$MinPrice) +
rev(na.locf(rev(data$MinPrice))))/2
head(data)plot(data$MinPrice, type = 'l')# Derive Year and Month attribute
data$Year = as.numeric(format(data$DateRange, format="%Y"))
data$Month = as.numeric(format(data$DateRange, format="%m"))
data = data %>% group_by(Year, Month) %>% summarise("MeanPrice" = mean(MinPrice))
data = data.frame(data)
# Creating sequence Time variable.
data$Time = 1:nrow(data)
datadata$Month = as.factor(data$Month)
data$Year = NULL
head(data)plot(data$MeanPrice, type = 'l')View(data)dim(data)[1] 57 3
train = data[1:45,]
test = data[45:nrow(data),]
rm(data)train_TS <- ts(train$MeanPrice, frequency = 12, start = c(2009, 4))
train_TS Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2009 89.33187 98.98226 111.71333 109.58000 99.34645 88.61500 87.52210 83.59500 86.47677
2010 94.56161 82.45250 79.88065 77.42767 80.44161 78.68367 83.06452 98.83145 92.20900 80.77274 80.69867 82.61000
2011 88.42161 72.58196 70.64968 59.03067 60.91677 72.91667 78.18194 91.21645 76.70200 68.08935 64.75600 71.52661
2012 70.53839 50.55517 47.01290 56.50133 62.88645 70.15700 72.60000 74.79161 59.73300 54.82694 54.71333 58.62968
test_TS <- ts(test$MeanPrice, frequency = 12, start = c(2013, 1))
test_TS Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2013 58.62968 63.73032 54.42857 44.01097 48.79200 56.64129 63.58633 61.78355 74.99113 66.84800 41.66387 50.78450
2014 49.10000
plot(train_TS,
type="l", lwd=3, col="blue",
xlab="Monthly", ylab="Mean Price",
main=" Aggregated Monthly Price Time series plot of 6989428 product - (Condition: Good)")plot(test_TS, col="red", lwd=3)train_Decomposed = decompose(train_TS)
plot(train_Decomposed)rm(train_Decomposed)par(mfrow=c(1,1))
plot(diff(train_TS, lag=1), type="l")Acf(train_TS,lag=44)Pacf(train_TS,lag=44)Ideal Random : A spike may or may not be present; even if present, magnitude will be small
Looking at the Y scale in ACF we observe that both the presents of both trend and seasonality.
par(mfrow=c(1,1))
plot(diff(train_TS, lag=1), type="l")Acf(diff(train_TS,lag=1), lag=43) Pacf(diff(train_TS, lag=1),lag=43)ndiffs(train_TS)[1] 1
nsdiffs(train_TS)[1] 1
fitsma = SMA(train_TS, n=2)
predsma = forecast(fitsma, h=13)
plot(predsma)smaTrainError = regr.eval(train_TS[2:length(train_TS)], fitsma[2:length(train_TS)])
smaTestError = regr.eval(test$MeanPrice, predsma$mean)
smaTrainError mae mse rmse mape
3.41443861 18.01882062 4.24485814 0.04614223
smaTestError mae mse rmse mape
7.7553174 85.4922508 9.2462020 0.1425691
fitwma = WMA(train_TS, n=2, 1:2)
predwma = forecast(fitwma, h=13)Missing values encountered. Using longest contiguous portion of time series
plot(predwma)wmaTrainError = regr.eval(train_TS[2:length(train_TS)], fitwma[2:length(train_TS)])
wmaTestError = regr.eval(test$MeanPrice, predwma$mean)
wmaTrainError mae mse rmse mape
2.27629241 8.00836472 2.82990543 0.03076148
wmaTestError mae mse rmse mape
7.8055217 86.0926104 9.2786104 0.1450969
fitEma = EMA(train_TS, n=2)
predema = forecast(fitEma, h=13)Missing values encountered. Using longest contiguous portion of time series
plot(predema)emaTrainError = regr.eval(train_TS[2:length(train_TS)], fitEma[2:length(train_TS)])
emaTestError = regr.eval(test$MeanPrice, predema$mean)
emaTrainError mae mse rmse mape
2.61643350 9.63456879 3.10396018 0.03564705
emaTestError mae mse rmse mape
7.8334700 86.7959612 9.3164350 0.1465041
## Simple linear regression
lm1 <- lm(MeanPrice~Time, data = train)
pred_Train = predict(lm1)
pred_Test = predict(lm1, test)
plot(train$MeanPrice, type="l")
points(train$Time, pred_Train, type="l", col="red", lwd=2)lm1TrainError = regr.eval(train$MeanPrice, pred_Train)
lm1TestError = regr.eval(test$MeanPrice,pred_Test)
lm1TrainError mae mse rmse mape
7.03431426 77.24627672 8.78898610 0.09489479
lm1TestError mae mse rmse mape
8.5976305 122.1112654 11.0503966 0.1436925
lm2 <- lm(MeanPrice~poly(Time, 2, raw=TRUE), data = train)
summary(lm2)
Call:
lm(formula = MeanPrice ~ poly(Time, 2, raw = TRUE), data = train)
Residuals:
Min 1Q Median 3Q Max
-18.0156 -5.2641 -0.6029 5.1536 20.4226
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 100.945771 4.233658 23.844 < 2e-16 ***
poly(Time, 2, raw = TRUE)1 -1.213800 0.424531 -2.859 0.00659 **
poly(Time, 2, raw = TRUE)2 0.006003 0.008948 0.671 0.50601
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 9.049 on 42 degrees of freedom
Multiple R-squared: 0.6611, Adjusted R-squared: 0.645
F-statistic: 40.97 on 2 and 42 DF, p-value: 1.35e-10
pred_Train = predict(lm2)
pred_Test = predict(lm2, test)
plot(train$MeanPrice, type="l")
points(train$Time, pred_Train, type="l", col="red", lwd=2)lm2TrainError = regr.eval(train$MeanPrice, pred_Train)
lm2TestError = regr.eval(test$MeanPrice, pred_Test)
lm2TrainError mae mse rmse mape
7.00157265 76.42743353 8.74227851 0.09451255
lm2TestError mae mse rmse mape
7.3643957 89.8083582 9.4767272 0.1292752
str(train)'data.frame': 45 obs. of 3 variables:
$ Month : Factor w/ 12 levels "1","2","3","4",..: 4 5 6 7 8 9 10 11 12 1 ...
$ MeanPrice: num 89.3 99 111.7 109.6 99.3 ...
$ Time : int 1 2 3 4 5 6 7 8 9 10 ...
slm1 <- lm(MeanPrice~., data=train)
pred_Train = predict(slm1)
pred_Test = predict(slm1, test)
plot(train$MeanPrice, type="l")
points(train$Time, pred_Train, type="l", col="red", lwd=2)slm1TrainError = regr.eval(train$MeanPrice, pred_Train)
slm1TestError = regr.eval(test$MeanPrice, pred_Test)
slm1TrainError mae mse rmse mape
4.10150034 24.54719694 4.95451279 0.05388787
slm1TestError mae mse rmse mape
6.4118755 61.1652110 7.8208191 0.1091351
str(train)'data.frame': 45 obs. of 3 variables:
$ Month : Factor w/ 12 levels "1","2","3","4",..: 4 5 6 7 8 9 10 11 12 1 ...
$ MeanPrice: num 89.3 99 111.7 109.6 99.3 ...
$ Time : int 1 2 3 4 5 6 7 8 9 10 ...
slm3 <- lm(MeanPrice~Month, data=train)
pred_Train = predict(slm3)
pred_Test = predict(slm3, test)
plot(train$MeanPrice, type="l")
points(train$Time, pred_Train, type="l", col="red", lwd=2)model_HW = HoltWinters(train_TS)
model_HWHolt-Winters exponential smoothing with trend and additive seasonal component.
Call:
HoltWinters(x = train_TS)
Smoothing parameters:
alpha: 0.3405816
beta : 0
gamma: 1
Coefficients:
[,1]
a 57.1567955
b -0.7574812
s1 3.4571409
s2 -12.4813346
s3 -12.2554308
s4 -6.4673170
s5 -3.0384529
s6 3.1260392
s7 5.3132461
s8 11.1305771
s9 -0.2680514
s10 -3.6813084
s11 -3.4357444
s12 1.4728819
## If you want a non-seasonal model?
# holtpriceforecast_NoS = HoltWinters(train_TS, gamma = FALSE)
## Model without trend and seasonality?
# holtpriceforecast_NoTS = HoltWinters(train_TS, beta = FALSE, gamma = FALSE)# Additive Model
model_HW_Add = HoltWinters(train_TS, seasonal="additive")
model_HW_AddHolt-Winters exponential smoothing with trend and additive seasonal component.
Call:
HoltWinters(x = train_TS, seasonal = "additive")
Smoothing parameters:
alpha: 0.3405816
beta : 0
gamma: 1
Coefficients:
[,1]
a 57.1567955
b -0.7574812
s1 3.4571409
s2 -12.4813346
s3 -12.2554308
s4 -6.4673170
s5 -3.0384529
s6 3.1260392
s7 5.3132461
s8 11.1305771
s9 -0.2680514
s10 -3.6813084
s11 -3.4357444
s12 1.4728819
# Multiplicative Model
model_HW_Mul = HoltWinters(train_TS, seasonal="multiplicative")
model_HW_MulHolt-Winters exponential smoothing with trend and multiplicative seasonal component.
Call:
HoltWinters(x = train_TS, seasonal = "multiplicative")
Smoothing parameters:
alpha: 0.3270572
beta : 0.004681688
gamma: 1
Coefficients:
[,1]
a 57.3094792
b -0.7944739
s1 1.0445773
s2 0.8040731
s3 0.8031275
s4 0.9141645
s5 0.9708252
s6 1.0647066
s7 1.0942112
s8 1.1666257
s9 0.9799104
s10 0.9276652
s11 0.9386134
s12 1.0230363
pred_train_HW = data.frame(model_HW_Mul$fitted)
head(pred_train_HW$xhat)[1] 82.57879 83.70974 80.38147 83.91881 99.38674 92.57758
pred_test_HW = forecast(model_HW_Mul, h = 12)
head(pred_test_HW)$method
[1] "HoltWinters"
$model
Holt-Winters exponential smoothing with trend and multiplicative seasonal component.
Call:
HoltWinters(x = train_TS, seasonal = "multiplicative")
Smoothing parameters:
alpha: 0.3270572
beta : 0.004681688
gamma: 1
Coefficients:
[,1]
a 57.3094792
b -0.7944739
s1 1.0445773
s2 0.8040731
s3 0.8031275
s4 0.9141645
s5 0.9708252
s6 1.0647066
s7 1.0942112
s8 1.1666257
s9 0.9799104
s10 0.9276652
s11 0.9386134
s12 1.0230363
$level
[1] 80 95
$mean
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2013 59.03429 44.80338 44.11263 49.48517 51.78101 55.94249 56.62342 59.44388 49.15154 45.79395 45.58870 48.87637
$lower
80% 95%
[1,] 51.86182 48.06494
[2,] 37.40503 33.48858
[3,] 36.34733 32.23662
[4,] 41.10535 36.66934
[5,] 42.88818 38.18059
[6,] 46.36776 41.29921
[7,] 46.61363 41.31476
[8,] 48.78145 43.13710
[9,] 39.16750 33.88227
[10,] 35.79150 30.49652
[11,] 35.22796 29.74332
[12,] 36.27018 29.59686
$upper
80% 95%
[1,] 66.20677 70.00365
[2,] 52.20173 56.11818
[3,] 51.87793 55.98863
[4,] 57.86499 62.30100
[5,] 60.67385 65.38143
[6,] 65.51722 70.58578
[7,] 66.63321 71.93207
[8,] 70.10631 75.75066
[9,] 59.13557 64.42080
[10,] 55.79640 61.09138
[11,] 55.94944 61.43408
[12,] 61.48256 68.15588
pred_test_HW$mean Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
2013 59.03429 44.80338 44.11263 49.48517 51.78101 55.94249 56.62342 59.44388 49.15154 45.79395 45.58870 48.87637
plot(pred_test_HW)# Make your own function
MAPE = function(predicted, actual){
mean(abs((actual[1:length(actual)]-predicted[1:length(actual)])/actual[1:length(actual)]))
}
#Or use accuracy() from Forecast package
HW_MAPE = accuracy(test$MeanPrice, pred_test_HW$mean)
HW_MAPE ME RMSE MAE MPE MAPE ACF1 Theil's U
Test set -6.271115 11.86086 8.403225 -13.28612 17.64101 0.1472497 2.148592
hw_NT_NS = HoltWinters(train_TS, beta=F, gamma=F)
hw_NT_NSHolt-Winters exponential smoothing without trend and without seasonal component.
Call:
HoltWinters(x = train_TS, beta = F, gamma = F)
Smoothing parameters:
alpha: 0.9999339
beta : FALSE
gamma: FALSE
Coefficients:
[,1]
a 58.62942
head(train_TS) Apr May Jun Jul Aug Sep
2009 89.33187 98.98226 111.71333 109.58000 99.34645 88.61500
head(hw_NT_NS$fitted) xhat level
May 2009 89.33187 89.33187
Jun 2009 98.98162 98.98162
Jul 2009 111.71249 111.71249
Aug 2009 109.58014 109.58014
Sep 2009 99.34713 99.34713
Oct 2009 88.61571 88.61571
predhw_NT_NS = predict(hw_NT_NS, 13, prediction.interval = TRUE)
head(predhw_NT_NS) fit upr lwr
Jan 2013 58.62942 75.40454 41.85430
Feb 2013 58.62942 82.35224 34.90660
Mar 2013 58.62942 87.68350 29.57534
Apr 2013 58.62942 92.17800 25.08084
May 2013 58.62942 96.13775 21.12109
Jun 2013 58.62942 99.71764 17.54119
regr.eval(train$MeanPrice[2:length(train$MeanPrice)], hw_NT_NS$fitted[,1]) mae mse rmse mape
6.82903964 72.07673568 8.48980186 0.09228698
regr.eval(test$MeanPrice, predhw_NT_NS[,1]) mae mse rmse mape
7.905941 89.849651 9.478906 0.150153
plot(hw_NT_NS, predhw_NT_NS)hw_T_NS = HoltWinters(train_TS, beta=T, gamma=F)
hw_T_NSHolt-Winters exponential smoothing with trend and without seasonal component.
Call:
HoltWinters(x = train_TS, beta = T, gamma = F)
Smoothing parameters:
alpha: 0.9137157
beta : TRUE
gamma: FALSE
Coefficients:
[,1]
a 58.224365
b 4.159942
head(train_TS) Apr May Jun Jul Aug Sep
2009 89.33187 98.98226 111.71333 109.58000 99.34645 88.61500
head(hw_T_NS$fitted) xhat level trend
Jun 2009 108.63264 98.98226 9.6503831
Jul 2009 123.91278 111.44752 12.4652599
Aug 2009 110.18587 110.81669 -0.6308243
Sep 2009 89.74675 100.28172 -10.5349705
Oct 2009 77.14358 88.71265 -11.5690707
Nov 2009 84.54054 86.62659 -2.0860586
predhw_T_NS = predict(hw_T_NS, 13, prediction.interval = TRUE)
head(predhw_T_NS) fit upr lwr
Jan 2013 62.38431 83.94181 40.826807
Feb 2013 66.54425 111.45171 21.636787
Mar 2013 70.70419 144.92398 -3.515601
Apr 2013 74.86413 183.10643 -33.378164
May 2013 79.02408 225.36652 -67.318365
Jun 2013 83.18402 271.28957 -104.921536
regr.eval(train$MeanPrice[3:length(train$MeanPrice)], hw_T_NS$fitted[,1]) mae mse rmse mape
8.8263311 118.1825083 10.8711779 0.1230179
regr.eval(test$MeanPrice, predhw_T_NS[,1]) mae mse rmse mape
30.8062521 1306.7960251 36.1496338 0.5948146
plot(hw_T_NS, predhw_T_NS)hw_T_S = HoltWinters(train_TS, beta=T, gamma=T)
hw_T_S = HoltWinters(train_TS)
hw_T_SHolt-Winters exponential smoothing with trend and additive seasonal component.
Call:
HoltWinters(x = train_TS)
Smoothing parameters:
alpha: 0.3405816
beta : 0
gamma: 1
Coefficients:
[,1]
a 57.1567955
b -0.7574812
s1 3.4571409
s2 -12.4813346
s3 -12.2554308
s4 -6.4673170
s5 -3.0384529
s6 3.1260392
s7 5.3132461
s8 11.1305771
s9 -0.2680514
s10 -3.6813084
s11 -3.4357444
s12 1.4728819
head(train_TS) Apr May Jun Jul Aug Sep
2009 89.33187 98.98226 111.71333 109.58000 99.34645 88.61500
head(hw_T_S$fitted) xhat level trend season
Apr 2010 83.16992 90.99995 -0.7574812 -7.0725445
May 2010 83.87259 88.28676 -0.7574812 -3.6566946
Jun 2010 80.47043 86.36075 -0.7574812 -5.1328446
Jul 2010 83.90221 84.99474 -0.7574812 -0.3350463
Aug 2010 99.29347 83.95195 -0.7574812 16.0989949
Sep 2010 92.55208 83.03712 -0.7574812 10.2724392
predhw_T_S = predict(hw_T_S, 13, prediction.interval = TRUE)
head(predhw_T_S) fit upr lwr
Jan 2013 59.85646 71.24613 48.46678
Feb 2013 43.16050 55.19264 31.12836
Mar 2013 42.62892 55.27091 29.98693
Apr 2013 47.65955 60.88330 34.43581
May 2013 50.33094 64.11190 36.54997
Jun 2013 55.73795 70.05446 41.42144
regr.eval(train$MeanPrice[13:length(train$MeanPrice)], hw_T_S$fitted[,1]) mae mse rmse mape
4.70021828 33.36585846 5.77631876 0.07177072
regr.eval(test$MeanPrice, predhw_T_S[,1]) mae mse rmse mape
7.6194583 130.7485752 11.4345343 0.1225012
plot(hw_T_NS, predhw_T_NS)hw = HoltWinters(train_TS)
hwHolt-Winters exponential smoothing with trend and additive seasonal component.
Call:
HoltWinters(x = train_TS)
Smoothing parameters:
alpha: 0.3405816
beta : 0
gamma: 1
Coefficients:
[,1]
a 57.1567955
b -0.7574812
s1 3.4571409
s2 -12.4813346
s3 -12.2554308
s4 -6.4673170
s5 -3.0384529
s6 3.1260392
s7 5.3132461
s8 11.1305771
s9 -0.2680514
s10 -3.6813084
s11 -3.4357444
s12 1.4728819
head(train_TS) Apr May Jun Jul Aug Sep
2009 89.33187 98.98226 111.71333 109.58000 99.34645 88.61500
head(hw$fitted) xhat level trend season
Apr 2010 83.16992 90.99995 -0.7574812 -7.0725445
May 2010 83.87259 88.28676 -0.7574812 -3.6566946
Jun 2010 80.47043 86.36075 -0.7574812 -5.1328446
Jul 2010 83.90221 84.99474 -0.7574812 -0.3350463
Aug 2010 99.29347 83.95195 -0.7574812 16.0989949
Sep 2010 92.55208 83.03712 -0.7574812 10.2724392
predhw = predict(hw, 13, prediction.interval = TRUE)
head(predhw) fit upr lwr
Jan 2013 59.85646 71.24613 48.46678
Feb 2013 43.16050 55.19264 31.12836
Mar 2013 42.62892 55.27091 29.98693
Apr 2013 47.65955 60.88330 34.43581
May 2013 50.33094 64.11190 36.54997
Jun 2013 55.73795 70.05446 41.42144
regr.eval(train$MeanPrice[13:length(train$MeanPrice)], hw$fitted[,1]) mae mse rmse mape
4.70021828 33.36585846 5.77631876 0.07177072
regr.eval(test$MeanPrice, predhw[,1]) mae mse rmse mape
7.6194583 130.7485752 11.4345343 0.1225012
plot(hw, predhw)par(mfrow=c(1,1))
plot(train_TS)train_TS_diff = diff(train_TS, lag=1)
plot(train_TS_diff, type="l")Acf(train_TS_diff,lag=44)Pacf(train_TS_diff,lag=44)ndiffs(train_TS)
nsdiffs(train_TS)par(mfrow=c(1,3))
plot(diff(train_TS, lag=12),type="l")
Acf(diff(train_TS,lag = 12),lag=40)
Pacf(diff(train_TS,lag = 12),lag=40)par(mfrow=c(1,3))
plot(diff(diff(train_TS, lag=12), lag=1),type="l")
Acf(diff(diff(train_TS, lag=12), lag=1),lag=40)
Pacf(diff(diff(train_TS, lag=12), lag=1),lag=40)ARIMA_1 = Arima(train_TS, c(0,1,0), c(0,1,0))
summary(ARIMA_1)par(mfrow = c(1,2))
Acf(ARIMA_1$residuals)
Pacf(ARIMA_1$residuals)
ARIMA_1 = Arima(train_TS, c(0,1,2), c(0,1,0)) # Try different models.
summary(ARIMA_1)par(mfrow = c(1,2))
Acf(ARIMA_1$residuals)
Pacf(ARIMA_1$residuals)Box.test(ARIMA_1$residuals, type="Ljung-Box", lag = 24)
#Null Hypothesis : No serial correlation upto 24 lags
# No significant spikespred_Train = fitted(ARIMA_1)
pred_Train
pred_Test = forecast(ARIMA_1, h = 13)
plot(pred_Test)
regr.eval(train$MeanPrice, pred_Train)
regr.eval(test$MeanPrice, data.frame(pred_Test)$Point.Forecast)ARIMA_auto = auto.arima(train_TS)
summary(ARIMA_auto)pred_Train = fitted(ARIMA_auto)
pred_Train
pred_Test = forecast(ARIMA_auto, h=13)
pred_Test
plot(pred_Test)par(mfrow = c(1,2))
Acf(ARIMA_1$residuals)
Pacf(ARIMA_1$residuals)
regr.eval(train$MeanPrice, pred_Train)
regr.eval(test$MeanPrice, data.frame(pred_Test)$Point.Forecast)